In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

Import data

In [3]:
confirmed_url ='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
deaths_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
recovered_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

confirmed = pd.read_csv(confirmed_url)
deaths = pd.read_csv(deaths_url)
recovered = pd.read_csv(recovered_url)
In [3]:
confirmed.head(5)
Out[3]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 11/4/20 11/5/20 11/6/20 11/7/20 11/8/20 11/9/20 11/10/20 11/11/20 11/12/20 11/13/20
0 NaN Afghanistan 33.93911 67.709953 0 0 0 0 0 0 ... 41814 41935 41975 42033 42092 42297 42463 42609 42795 42969
1 NaN Albania 41.15330 20.168300 0 0 0 0 0 0 ... 22300 22721 23210 23705 24206 24731 25294 25801 26211 26701
2 NaN Algeria 28.03390 1.659600 0 0 0 0 0 0 ... 59527 60169 60800 61381 62051 62693 63446 64257 65108 65975
3 NaN Andorra 42.50630 1.521800 0 0 0 0 0 0 ... 5045 5135 5135 5319 5383 5437 5477 5567 5616 5725
4 NaN Angola -11.20270 17.873900 0 0 0 0 0 0 ... 11813 12102 12223 12335 12433 12680 12816 12953 13053 13228

5 rows × 301 columns

In [4]:
deaths.head(5)
Out[4]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 11/4/20 11/5/20 11/6/20 11/7/20 11/8/20 11/9/20 11/10/20 11/11/20 11/12/20 11/13/20
0 NaN Afghanistan 33.93911 67.709953 0 0 0 0 0 0 ... 1548 1554 1554 1556 1558 1574 1577 1581 1591 1595
1 NaN Albania 41.15330 20.168300 0 0 0 0 0 0 ... 536 543 549 557 559 571 579 590 598 605
2 NaN Algeria 28.03390 1.659600 0 0 0 0 0 0 ... 1999 2011 2024 2036 2048 2062 2077 2093 2111 2124
3 NaN Andorra 42.50630 1.521800 0 0 0 0 0 0 ... 75 75 75 75 75 75 75 75 75 75
4 NaN Angola -11.20270 17.873900 0 0 0 0 0 0 ... 296 299 300 303 307 308 308 312 315 317

5 rows × 301 columns

In [5]:
recovered.head(5)
Out[5]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 11/4/20 11/5/20 11/6/20 11/7/20 11/8/20 11/9/20 11/10/20 11/11/20 11/12/20 11/13/20
0 NaN Afghanistan 33.93911 67.709953 0 0 0 0 0 0 ... 34362 34440 34440 34446 34458 34721 34954 34967 35024 35036
1 NaN Albania 41.15330 20.168300 0 0 0 0 0 0 ... 11578 11696 11861 12002 12092 12203 12353 12493 12574 12667
2 NaN Algeria 28.03390 1.659600 0 0 0 0 0 0 ... 41001 41244 41510 41783 42037 42325 42626 42980 42980 43779
3 NaN Andorra 42.50630 1.521800 0 0 0 0 0 0 ... 3734 3858 3858 4043 4248 4332 4405 4488 4585 4675
4 NaN Angola -11.20270 17.873900 0 0 0 0 0 0 ... 5266 5350 5626 5647 5899 5927 6036 6125 6250 6326

5 rows × 301 columns

Modification data

In [4]:
dates = confirmed.columns[4:]

confirmed_df_long = confirmed.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Confirmed'
)
deaths_df_long = deaths.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Deaths'
)
recovered_df_long = recovered.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Recovered'
)
In [21]:
confirmed_df_long.head(5)
Out[21]:
Province/State Country/Region Lat Long Date Confirmed
0 NaN Afghanistan 33.93911 67.709953 1/22/20 0
1 NaN Albania 41.15330 20.168300 1/22/20 0
2 NaN Algeria 28.03390 1.659600 1/22/20 0
3 NaN Andorra 42.50630 1.521800 1/22/20 0
4 NaN Angola -11.20270 17.873900 1/22/20 0
In [22]:
deaths_df_long.head(5)
Out[22]:
Province/State Country/Region Lat Long Date Deaths
0 NaN Afghanistan 33.93911 67.709953 1/22/20 0
1 NaN Albania 41.15330 20.168300 1/22/20 0
2 NaN Algeria 28.03390 1.659600 1/22/20 0
3 NaN Andorra 42.50630 1.521800 1/22/20 0
4 NaN Angola -11.20270 17.873900 1/22/20 0
In [23]:
recovered_df_long.head(5)
Out[23]:
Province/State Country/Region Lat Long Date Recovered
0 NaN Afghanistan 33.93911 67.709953 1/22/20 0
1 NaN Albania 41.15330 20.168300 1/22/20 0
2 NaN Algeria 28.03390 1.659600 1/22/20 0
3 NaN Andorra 42.50630 1.521800 1/22/20 0
4 NaN Angola -11.20270 17.873900 1/22/20 0
In [5]:
DF = pd.merge(confirmed_df_long, deaths_df_long, on = ['Country/Region', 'Lat', 'Long', 'Province/State', 'Date'], how = 'left')
DF = pd.merge(DF, recovered_df_long, on = ['Country/Region', 'Lat', 'Long', 'Province/State', 'Date'], how = 'left')
In [36]:
DF.head(5)
Out[36]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered
0 NaN Afghanistan 33.93911 67.709953 1/22/20 0 0 0.0
1 NaN Albania 41.15330 20.168300 1/22/20 0 0 0.0
2 NaN Algeria 28.03390 1.659600 1/22/20 0 0 0.0
3 NaN Andorra 42.50630 1.521800 1/22/20 0 0 0.0
4 NaN Angola -11.20270 17.873900 1/22/20 0 0 0.0
In [5]:
DF.dtypes
Out[5]:
Province/State     object
Country/Region     object
Lat               float64
Long              float64
Date               object
Confirmed           int64
Deaths              int64
Recovered         float64
dtype: object
In [7]:
DF = DF.rename(columns = {"Country/Region": "Country"}) 
DF['Country'] = DF['Country'].astype('string')
DF['Date'] = pd.to_datetime(DF['Date'])
DF = DF[['Country', 'Date', 'Confirmed', 'Deaths', 'Recovered']]
In [ ]:
DF.set_index('Date', inplace=True)
In [106]:
DF.head()
Out[106]:
Country Confirmed Deaths Recovered
Date
2020-01-22 Afghanistan 0 0 0.0
2020-01-22 Albania 0 0 0.0
2020-01-22 Algeria 0 0 0.0
2020-01-22 Andorra 0 0 0.0
2020-01-22 Angola 0 0 0.0

Function definitions

In [63]:
def give_global_cases(df):
    
    f = plt.figure()
    plt.ticklabel_format(style = 'plain')
    df.groupby(df['Date'].dt.strftime('%B'))['Confirmed'].sum().sort_values().plot(grid=True, figsize=(8,6), title='Covid-19 Global cases in timeline', ax=f.gca())
In [64]:
give_global_cases(DF)
In [61]:
def give_cases_per_country(countries_list):
    
    temp = DF[DF['Country'].isin(countries_list)] 
    f = plt.figure()
    plt.ticklabel_format(style = 'plain')
    temp.groupby(['Country'])['Confirmed', 'Recovered', 'Deaths'].agg('sum').plot(kind='bar', grid=True, rot=0, figsize=(10, 6), ax=f.gca())
In [65]:
give_cases_per_country(['Poland', 'Germany', 'Czechia', 'United Kingdom', 'France'])
C:\Users\ekamrowska\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  
In [39]:
def give_cases_per_country_timeline(x):
    DF2 = DF.query("Country == @x")
    
    cols_plot = ['Confirmed', 'Deaths', 'Recovered']
    axes = DF2[cols_plot].plot(marker='.', alpha=0.5, linestyle='None', figsize=(10, 8), subplots=True, grid=True)
    for ax in axes:
        ax.set_ylabel('Number of cases')
In [40]:
give_cases_per_country_timeline("Poland")
In [41]:
give_cases_per_country_timeline('Germany')
In [10]:
def give_generally_distribution_per_countries(countries_list):
    
    temp = DF[DF['Country'].isin(countries_list)]
    
    fig, ax = plt.subplots(1,3, figsize=(16, 6))
    temp.boxplot('Confirmed','Country', ax=ax[0])
    temp.boxplot('Recovered', 'Country', ax=ax[1])
    temp.boxplot('Deaths','Country', ax=ax[2])
In [11]:
give_generally_distribution_per_countries(['Poland', 'Germany', 'Czechia'])
In [78]:
# based on: https://opensource.com/article/20/4/python-map-covid-19
import pycountry
import plotly.express as px
import pandas as pd

list_countries = DF['Country'].unique()
d_country_code = {}  
for country in list_countries:
    try:
        country_data = pycountry.countries.search_fuzzy(country)
        country_code = country_data[0].alpha_3
        d_country_code.update({country: country_code})
    except:
        print('could not add ISO 3 code for ->', country)
        d_country_code.update({country: ' '})

for k, v in d_country_code.items():
    DF.loc[(DF.Country == k), 'iso_alpha'] = v

fig = px.choropleth(data_frame = DF,
                    locations= "iso_alpha",
                    color= "Confirmed", 
                    hover_name= "Country")
                    #color_continuous_scale= 'RdYlGn', 
                    #animation_frame= "Date")

fig.show()
could not add ISO 3 code for -> Burma
could not add ISO 3 code for -> Congo (Brazzaville)
could not add ISO 3 code for -> Congo (Kinshasa)
could not add ISO 3 code for -> Diamond Princess
could not add ISO 3 code for -> Korea, South
could not add ISO 3 code for -> Laos
could not add ISO 3 code for -> MS Zaandam
could not add ISO 3 code for -> Taiwan*
could not add ISO 3 code for -> West Bank and Gaza
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: